* Title: 	cw_puma2000_migpuma2000.do
* Version: 	23 May 2018
* Purpose: 	Create crosswalk between MIGPUMAs and PUMAs for 2000 Census data


*******************************************************************************
* (0) Start of file
*******************************************************************************

capture log close
log using log/cw_migpuma, replace
set more off
clear all
macro drop _all


*******************************************************************************
* (1) Load raw crosswalk from IPUMS and clean spreadsheet
*******************************************************************************

import excel using dta/raw/cw_migpuma_raw.xlsx, first
keep statefip migpuma puma

* Fill in state codes and names
gen n = _n
tsset n
replace statefip = L.statefip if mi(statefip)
drop n

replace migpuma = statefip * 100000 + migpuma
isid migpuma


split puma, p(",") 
drop puma
reshape long puma, i(statefip migpuma) j(j)
drop if puma == ""
drop j

split puma, p("-") destring
drop puma
rename puma1 start_puma
rename puma2 end_puma
replace end_puma = start_puma if mi(end_puma)
foreach var of varlist *_puma {
	replace `var' = statefip * 100000 + `var'
}

drop if statefip>56
tempfile cw_ipums
save `cw_ipums'


*******************************************************************************
* (3) Get list of 2000 PUMAs
*******************************************************************************

use year statefip puma using dta/census_clean
keep if year == 2000
replace puma = statefip * 100000 + puma
drop year statefip
duplicates drop

isid puma
sort puma
tempfile puma2000
save `puma2000'


*******************************************************************************
* (4) Merge to create crosswalk
*******************************************************************************
use `puma2000', clear
merge 1:1 _n using `cw_ipums', keepusing(start_puma end_puma) nogen

gen start = .
gen end = .
forv i=1/`c(N)' {
        if mi(start_puma[`i']) continue, break
		replace start = start_puma[`i'] if inrange(puma,start_puma[`i'],end_puma[`i'])
        replace end = end_puma[`i'] if inrange(puma,start_puma[`i'],end_puma[`i'])
 }

drop start_puma end_puma
ren start start_puma
ren end end_puma
merge m:1 start_puma end_puma using `cw_ipums', assert(matched) nogen

isid puma
sort puma
keep  puma migpuma
order puma migpuma

* Now, convert MIGPUMA codes to what is actually in IPUMS
tostring migpuma, replace
replace migpuma = substr(migpuma,1,strlen(migpuma)-2) + "00"
destring migpuma, replace

*******************************************************************************
* (5) End of file
*******************************************************************************


ren puma puma_code
ren migpuma migpuma_code

compress
save dta/cw_puma2000_migpuma2000, replace

log close
exit, clear
